# ------------------------------------------------------------------------------
# Prints all cost effectiveness stats used in paper
# Author: Cassidy Shubatt <cshubatt@gmail.com>
# To run: bash 02_cost_effectiveness_stats.sh
# ------------------------------------------------------------------------------

# Libraries --------------------------------------------------------------------
library(here)
library(yaml)
library(data.table)
library(scales)
library(tidyverse)
library(glue) # glue

u <- modules::use(here::here("lib", "util.R"))
temp <- here("code", "04_cost_effectiveness", "temp")

# Load Data --------------------------------------------------------------------
message("Loading data...")
paths <- read_yaml(here("lib", "filepaths.yml"))
overnight_lab <- ""
daly_cost <- readRDS(paths$analysis$daly_cost) %>%
  select(ed_enc_id, cost, daly_100, daly_200, daly_250, daly_300, daly_500)
cohort <- readRDS(glue(paths$analysis$test_cohort)) %>%
  u$safe_left_join(daly_cost) %>%
  filter(!exclude)

# Cost Eff by Decile -----------------------------------------------------------
message("Getting cost effectiveness table by quintile...")
tested <- filter(cohort, test_010_day)
cost_eff_tbl <- tested %>%
  group_by(tile_stent_or_cabg_005_tested) %>%
  summarize(
    n = n(),
    mean_risk = mean(p__ensemble__stent_or_cabg_010_day__tested),
    avg_cost = mean(cost, na.rm = TRUE),
    avg_daly_250 = mean(daly_250, na.rm = TRUE),
    cost_per_daly_250 = sum(cost, na.rm = TRUE) / sum(daly_250, na.rm = TRUE),
  ) %>%
  ungroup()

print(cost_eff_tbl)
overall_eff <- sum(tested$cost, na.rm = TRUE) / sum(tested$daly_250, na.rm = TRUE)
overall_eff_lo <- sum(tested$cost, na.rm = TRUE) / sum(tested$daly_300, na.rm = TRUE)
overall_eff_hi <- sum(tested$cost, na.rm = TRUE) / sum(tested$daly_200, na.rm = TRUE)
message(
  "Overall cost effectiveness: ", trunc(overall_eff, 0), "(",
  trunc(overall_eff_lo, 0), "-", trunc(overall_eff_hi, 0), ")"
)

bottom_40 <- filter(tested, tile_stent_or_cabg_005_tested <= 2)
bottom_40_ce <- sum(bottom_40$cost, na.rm = TRUE) / sum(bottom_40$daly_250, na.rm = TRUE)
message("Cost-effectiveness in bottom 40%: ", bottom_40_ce)

bottom_50 <- filter(tested, tile_stent_or_cabg_010_tested <= 5)
bottom_50_ce <- sum(bottom_50$cost, na.rm = TRUE) / sum(bottom_50$daly_250, na.rm = TRUE)
message("Cost-effectiveness in bottom 50%: ", bottom_50_ce)

bottom_60 <- filter(tested, tile_stent_or_cabg_005_tested <= 3)
bottom_60_ce <- sum(bottom_60$cost, na.rm = TRUE) / sum(bottom_60$daly_250, na.rm = TRUE)
message("Cost-effectiveness in bottom 60%: ", bottom_60_ce)

top_50 <- filter(tested, tile_stent_or_cabg_010_tested > 5)
top_50_ce <- sum(top_50$cost, na.rm = TRUE) / sum(top_50$daly_250, na.rm = TRUE)
message("Cost-effectiveness in top 50%: ", top_50_ce)

# Interpolating polynomial -----------------------------------------------------
message("Interpolating risk to find 150K risk threshold...")
risk <- cost_eff_tbl$mean_risk

# only works if 150K threshold is between Q3 and Q4
fit <- lm(cost_per_daly_250 ~ mean_risk, data = cost_eff_tbl[3:4, ])
risk_cutoff <- (150000 - coef(fit)["(Intercept)"]) / coef(fit)["mean_risk"]
write_rds(risk_cutoff, paths$analysis$costeff_risk_cutoff)

message("150K risk threshold = ", risk_cutoff)
message("")
message("Cost-eff stats from tested:")

message("")
tested <- filter(cohort, test_010_day)
kept_tests <- filter(tested, p__ensemble__stent_or_cabg_010_day__tested >= risk_cutoff)
dropped_tests <- filter(tested, p__ensemble__stent_or_cabg_010_day__tested < risk_cutoff)
message("Num tests above threshold: ", nrow(kept_tests))
message("Pct of tests above threshold: ", nrow(kept_tests)/nrow(tested))
message("LY saved by kept tests: ", sum(kept_tests$daly_250))
message("Total cost of kept tests: ", sum(kept_tests$cost))
message(
  "Cost-effectiveness of kept tests: ",
  sum(kept_tests$cost)/sum(kept_tests$daly_250)
)

message("")
tested_Q5 <- filter(tested, tile_stent_or_cabg_005_tested == 5)
message("Num Q5 tests: ", nrow(tested_Q5))
message("LY saved by Q5 tests: ", sum(tested_Q5$daly_250))
message("Total cost of Q5 tests: ", sum(tested_Q5$cost))
message(
  "Cost-effectiveness of Q5 tests: ",
  sum(tested_Q5$cost)/sum(tested_Q5$daly_250)
)

message("")
# comes from natural experiment test-set simulation
# scaled down by size of holdout relative to (holdout + train)
num_added_tests <- 279.2
pct_new_tests <- num_added_tests/(num_added_tests + nrow(kept_tests))
new_old_ratio <- num_added_tests/nrow(tested_Q5)
ly_new <- sum(tested_Q5$daly_250)*new_old_ratio
cost_new <- sum(tested_Q5$cost)*new_old_ratio

ly_total = ly_new + sum(kept_tests$daly_250)
cost_total = cost_new + sum(kept_tests$cost)
costeff_comb_new = cost_total/ly_total
message("Num tests in preferred regime: ", num_added_tests + nrow(kept_tests))
message("Pct of tests in preferred regime that are new: ", pct_new_tests)
message("Cost-eff of tests under preferred regime: ", costeff_comb_new)

message("")
message("Num tests below threshold: ", nrow(dropped_tests))
message("Pct of tested below 150K est cutoff: ", nrow(dropped_tests)/nrow(tested))
message("Total LY given up in dropped tests", sum(dropped_tests$daly_250))
message("Total cost of dropped tests: ", sum(dropped_tests$cost))
message(
  "Forgone treatments from dropped tests: ",
  sum(dropped_tests$stent_or_cabg_010_day)
)
message("150K value of Life-Years given up: ", 150000 * sum(dropped_tests$daly_250))
message(
  "Cost-effectiveness of dropped tests: ",
  sum(dropped_tests$cost)/sum(dropped_tests$daly_250)
)

message("")
message("Cost-eff stats on untested:")
untested <- filter(cohort, !test_010_day)
costeff_untested <- untested %>%
  filter(p__ensemble__stent_or_cabg_010_day__tested > risk_cutoff)

message(
  "Pct of untested above 150K cost-eff threshold: ",
  nrow(costeff_untested) / nrow(untested)
)
message("Num of untested above costeff threshold: ", nrow(costeff_untested))
message(
  "Expected num yields in untested, costeff patients: ",
  sum(costeff_untested$p__ensemble__stent_or_cabg_010_day__tested)
)

# Q5 upper bound ---------------------------------------------------------------
message("Getting upper bounds on cost-effectiveness of risk-Q5 tests...")
Q5 <- filter(tested, tile_stent_or_cabg_005_tested == 5)
total_cost <- sum(Q5$cost, na.rm = T)
total_daly_200 <- sum(Q5$daly_200, na.rm = T)
total_daly_100 <- sum(Q5$daly_100, na.rm = T)
message("Q5 cost-effectiveness 10%: ", total_cost / total_daly_100)
message("Q5 cost-effectiveness 20%: ", total_cost / total_daly_200)

message("Done.")
